# Libraries
if (!require("pacman")) install.packages("pacman")
pacman::p_load(dplyr, ggplot2, tidyr, cowplot, viridis, forcats, radiant.data, purrr, phangorn, gridExtra, ggtree, htmlTable, ape, cluster, tibble, Biostrings, seqinr, stringr, scales)

1 Double-check ARIBA results with annotated sequences from Prokka

# Functions
mismatches <- function(query, ref) {
  pairwiseAlignment(ref, query, substitutionMatrix = "BLOSUM50",
                    gapOpening = 3, gapExtension = 1) %>%
    mismatchTable() %>%
    mutate(ID=names(query),
           Pos=PatternStart,
           Reference_AA=as.character(PatternSubstring),
           Sample_AA=as.character(SubjectSubstring)) %>%
    select(ID, Reference_AA, Sample_AA, Pos) %>%
    filter(Sample_AA != "X")
}  

# Import sequence data
gyrA <- readAAStringSet("../data/Prokka/DNA_gyrase_GYRA_aa_sequences.fa")
parC <- readAAStringSet("../data/Prokka/Topoisomerase_IV_subunit_A_aa_sequences.fa")
parE <- readAAStringSet("../data/Prokka/DNA_topoisomerase_IV_subunit_B_aa_sequences.fa")

# Identify results from ARIBA
mut_gyrA <- mut_quant %>%
  select(id, gyrA) %>%
  mutate(entries = sapply(strsplit(.$gyrA, ","),
                          FUN = function(x) {length(x)})) %>%
  separate(gyrA, into = as.character(c(1:max(.$entries)))) %>%
  select(-entries) %>%
  gather(key, value, `1`,`2`) %>%
  filter(is.na(value) == FALSE) %>%
  select(-key) %>%
  group_by(id) %>%
  mutate(test = 1:n()) %>%
  ungroup()

# Run mutation identification analysis on Prokka sequences and
# match to ARIBA results
gyrA_results <- bind_rows(lapply(seq_along(gyrA[-1]),
                            function(i) mismatches(gyrA[i + 1],
                                                   gyrA[1]))) %>%
  mutate(mutation = paste0(Reference_AA, Pos, Sample_AA)) %>%
  select(ID, mutation) %>%
  mutate(ID = sub(".faa", "", ID)) %>%
  dplyr::rename("ref" = ID,
                "gyrA_prokka" = mutation) %>%
  left_join(id_data, by = "ref") %>%
  select(id, gyrA_prokka) %>%
  mutate(gyrA_result = gyrA_prokka %>% # control for mutation within QRDR for gyrA
             str_extract_all("\\d+") %>% # from reprex package
             map(as.integer) %>% # converts all to integer
             map_lgl(~ any(.x >= 67L & .x <= 106L))) %>%
  filter(gyrA_result == TRUE) %>%
  group_by(id) %>%
  mutate(test = 1:n()) %>%
  ungroup() %>%
  left_join(mut_gyrA, by = c("id","test")) %>%
  select(-test) %>%
  dplyr::rename("gyrA_ariba" = value) %>%
  filter(id %not_in% c("2016-17-565-1-S",
                       "2016-02-428-2-S",
                       "2016-02-486-2-S",
                       "2016-02-732-2-S",
                       "2014-01-1741-1-S")) %>%
  select(-gyrA_result)

# Identify results from ARIBA
mut_parC <- mut_quant %>%
  select(id, parC) %>%
  mutate(entries = sapply(strsplit(.$parC, ","), FUN = function(x) {length(x)})) %>%
  separate(parC, into = as.character(c(1:max(.$entries)))) %>%
  select(-entries) %>%
  gather(key, value, `1`,`2`) %>%
  filter(is.na(value) == FALSE) %>%
  select(-key) %>%
  group_by(id) %>%
  mutate(test = 1:n()) %>%
  ungroup()

# Run mutation identification analysis on Prokka sequences and
# match to ARIBA results
parC_results <- bind_rows(lapply(seq_along(parC[-1]),
                                 function(i) mismatches(parC[i + 1],
                                                        parC[1]))) %>%
  mutate(Pos = Pos + 22) %>%
  mutate(mutation = paste0(Reference_AA, Pos, Sample_AA)) %>%
  select(ID, mutation) %>%
  mutate(ID = sub(".faa", "", ID)) %>%
  dplyr::rename("ref" = ID,
                "parC_prokka" = mutation) %>%
  left_join(id_data, by = "ref") %>%
  select(id, parC_prokka) %>%
  mutate(parC_result = parC_prokka %>%
             str_extract_all("\\d+") %>% 
             map(as.integer) %>% 
             map_lgl(~ any(.x >= 51L & .x <= 170L))) %>%
  filter(parC_result == TRUE) %>%
  group_by(id) %>%
  mutate(test = 1:n()) %>%
  ungroup() %>%
  left_join(mut_parC, by = c("id","test")) %>%
  select(-test) %>%
  dplyr::rename("parC_ariba" = value) %>%
  filter(id %not_in% c("2016-17-565-1-S",
                       "2016-02-428-2-S",
                       "2016-02-486-2-S",
                       "2016-02-732-2-S",
                       "2014-01-1741-1-S")) %>%
  select(-parC_result)

# Identify results from ARIBA
mut_parE <- mut_quant %>%
  select(id, parE) %>%
  mutate(entries = sapply(strsplit(.$parE, ","), FUN = function(x) {length(x)})) %>%
  separate(parE, into = as.character(c(1:max(.$entries)))) %>%
  select(-entries) %>%
  gather(key, value, `1`,`2`) %>%
  filter(is.na(value) == FALSE) %>%
  select(-key) %>%
  group_by(id) %>%
  mutate(test = 1:n()) %>%
  ungroup()

# Run mutation identification analysis on Prokka sequences and
# match to ARIBA results
parE_results <- bind_rows(lapply(seq_along(parE[-1]),
                                 function(i) mismatches(parE[i + 1],
                                                        parE[1]))) %>%
  #mutate(Pos = Pos + 22) %>%
  mutate(mutation = paste0(Reference_AA, Pos, Sample_AA)) %>%
  select(ID, mutation) %>%
  mutate(ID = sub(".faa", "", ID)) %>%
  dplyr::rename("ref" = ID,
                "parE_prokka" = mutation) %>%
  left_join(id_data, by = "ref") %>%
  select(id, parE_prokka) %>%
  mutate(parE_result = parE_prokka %>%
           str_extract_all("\\d+") %>% 
           map(as.integer) %>% 
           map_lgl(~ any(.x >= 366L & .x <= 523L))) %>%
  filter(parE_result == TRUE) %>%
  group_by(id) %>%
  mutate(test = 1:n()) %>%
  ungroup() %>%
  left_join(mut_parE, by = c("id","test")) %>%
  select(-test) %>%
  dplyr::rename("parE_ariba" = value) %>%
  filter(id %not_in% c("2016-17-565-1-S",
                       "2016-02-428-2-S",
                       "2016-02-486-2-S",
                       "2016-02-732-2-S",
                       "2014-01-1741-1-S")) %>%
  select(-parE_result)


full_report <- gyrA_results %>%
  left_join(parC_results, by = "id") %>%
  left_join(parE_results, by = "id") %>%
  group_by(id) %>%
  summarise_all(funs(func_paste)) %>%
  mutate(parC_ariba = ifelse(parC_prokka == "S80I" & 
                               parC_ariba == "S58I", "S80I", parC_ariba))

full_report

2 Tables

2.1 Isolate data

isolate_data_complete <- isolate_data %>%
  left_join(no_of_reads, by = "id")

isolate_data_complete

2.2 List of genes of interest

This table lists all the genes of interest in this study, some information about them, and their accession number(s).

# 
# acquired_ref_names <- acquired_data$ref_name
# acquired_ref <- c()
# 
# for (i in ac_genes) {
#   for (j in acquired_ref_names) {
#     if (grepl(i,j, ignore.case = T) == TRUE) {
#       acquired_ref <- c(acquired_ref, j)
#     }
#   }
# }
# 
# acquired_ref <- unique(acquired_ref)
# 
# acquired_ref_genes <- acquired_data %>%
#   mutate(test = duplicated(ref_name),
#          gene = case_when(grepl("qnra1", ref_name, ignore.case = T) == TRUE ~ "qnrA1",
#                           grepl("qnrb19", ref_name, ignore.case = T) == TRUE ~ "qnrB19",
#                           grepl("qnrb6", ref_name, ignore.case = T) == TRUE ~ "qnrB6",
#                           grepl("qnrb60", ref_name, ignore.case = T) == TRUE ~ "qnrB60",
#                           grepl("qnrs1", ref_name, ignore.case = T) == TRUE ~ "qnrS1",
#                           grepl("qnrs2", ref_name, ignore.case = T) == TRUE ~ "qnrS2",
#                           grepl("qnrs4", ref_name, ignore.case = T) == TRUE ~ "qnrs4",
#                           grepl("qnrs9", ref_name, ignore.case = T) == TRUE ~ "qnrS9",
#                           grepl("qep", ref_name, ignore.case = T) == TRUE ~ "qepA")) %>%
#   filter(test == FALSE,
#          ref_name %in% acquired_ref_names) %>%
#   select(ref_name, gene) %>%
#   mutate(compl = complete.cases(.)) %>%
#   filter(compl == TRUE) %>%
#   mutate(ref_name2 = sub(".+_(.*?)", "\\1", ref_name)) %>%
#   group_by(gene) %>%
#   summarise_all(funs(func_paste)) %>%
#   select(gene, ref_name2) %>%
#   dplyr::rename("Accession number(s)" = ref_name2,
#          "Gene" = gene) %>%
#   mutate(Description = "Quinolone resistance protein, plasmid mediated, decreases susceptibility of quinolones",
#          Type = "Acquired")
#   
# df_ref_names <- mut_data$cluster
# ref_names <- c()
# 
# for (i in mut_genes) {
#   for (j in df_ref_names) {
#     if (grepl(i,j, ignore.case = T) == TRUE) {
#       ref_names <- c(ref_names, j)
#     }
#   }
# }
# 
# ref_names <- unique(ref_names)
# 
# intrinsic_ref_genes <- mut_data %>%
#   mutate(test = duplicated(cluster),
#          gene = case_when(grepl("gyra", cluster, ignore.case = T) == TRUE ~ "gyrA",
#                           grepl("gyrb", cluster, ignore.case = T) == TRUE ~ "gyrB",
#                           grepl("parc", cluster, ignore.case = T) == TRUE ~ "parC",
#                           grepl("pare", cluster, ignore.case = T) == TRUE ~ "parE",
#                           grepl("soxS", cluster, ignore.case = T) == TRUE ~ "soxS",
#                           grepl("marR", cluster, ignore.case = T) == TRUE ~ "marR",
#                           grepl("mara", cluster, ignore.case = T) == TRUE ~ "marA",
#                           grepl("rpob", ref_name, ignore.case = T) == TRUE ~ "rpoB",
#                           grepl("rob", ref_name, ignore.case = T) == TRUE ~ "robA")) %>%
#   filter(test == FALSE,
#          cluster %in% ref_names) %>%
#   select(ref_name, gene, free_text) %>%
#   mutate(ref_name2 = sub("^.*?_([A-Z]+[0-9_.]*[0-9]).*", "\\1", ref_name)) %>%
#   select(-ref_name) %>%
#   group_by(gene) %>%
#   summarise_all(funs(func_paste)) %>%
#   select(gene, ref_name2) %>%
#   dplyr::rename("Accession number(s)" = ref_name2,
#          "Gene" = gene)
#   # mutate(Description = case_when(Gene == "gyrA" ~ ,
#   #                                Gene == "gyrB" ~ ,
#   #                                Gene == "marR" ~ ,
#   #                                Gene == "marA" ~ ,
#   #                                Gene == "parC" ~ ,
#   #                                Gene == "parE" ~ ,
#   #                                Gene == "soxS" ~ "Transcriptional activator of superoxide response regulon",
#   #                                Gene == "rpoB" ~ "RNA polymerase subunit B",
#   #                                Gene == "robA" ~ ),
#   #        Type = "Intrinsic gene")
#                 
# ref_genes <- rbind(intrinsic_ref_genes, acquired_ref_genes)
# 
# ref_genes

2.3 Isolates per species

per_species <- isolate_data %>%
  group_by(species) %>%
  dplyr::count()

per_species

2.4 Sequence types in total

tot_mlst <- mlst_data[,c("id","ST")] %>%
  group_by(ST) %>%
  dplyr::count() %>%
  arrange(desc(n))

print(paste0("Total amount of different sequence types: ", nrow(tot_mlst)))
## [1] "Total amount of different sequence types: 83"
tot_mlst

2.5 Sequence types per animal species

mlst_data[,c("id","ST")] %>%
  left_join(isolate_data[,c("id","species")], by = "id") %>%
  group_by(ST, species) %>%
  dplyr::count() %>%
  arrange(desc(n))

2.6 Most abundant (n > 3) sequence types per animal species

mlst_n <- mlst_data[,c("id","ST")] %>%
  group_by(ST) %>%
  dplyr::count() %>%
  filter(n > 3)

mlst_data[,c("id","ST")] %>%
  left_join(isolate_data[,c("id","species")], by = "id") %>%
  spread(ST, species) %>%
  summarise_all(funs(func_paste)) %>%
  select(-id) %>%
  gather(ST, species) %>%
  filter(ST %in% mlst_n$ST)

2.7 Number of sequence types per animal species

mlst_data[,c("id","ST")] %>%
  left_join(isolate_data[,c("id","species")], by = "id") %>%
  group_by(species) %>%
  mutate(n = length(unique(ST))) %>%
  select(species,n) %>%
  summarise_all(funs(func_paste))

2.8 Percent isolates with PMQR

acquired_report %>%
  select(-id) %>%
  mutate_all(funs(as.numeric(.))) %>%
  mutate(total = rowSums(.),
         result = if_else(total != 0, 1, 0)) %>%
  group_by(result) %>%
  dplyr::count() %>%
  ungroup() %>%
  mutate(result = if_else(result == 0, "Absent", "Present")) %>%
  spread(result, n) %>%
  mutate(Total = Present + Absent,
         Percent = round(Present/Total * 100,1))

2.9 Percent isolates with qnr family genes

acquired_report %>%
  select(-c(qepA4, id)) %>%
  mutate_all(funs(as.numeric)) %>%
  dplyr::rename("qnrA" = qnrA1) %>%
  mutate(qnrB = ifelse(rowSums(.[,2:4]) >= 1, 1, 0),
         qnrS = ifelse(rowSums(.[,5:7]) >= 1, 1, 0)) %>%
  select(qnrA, qnrB, qnrS) %>%
  gather(gene, value) %>%
  group_by(gene, value) %>%
  dplyr::count() %>%
  ungroup() %>%
  mutate(value = if_else(value == 0, "Absent", "Present")) %>%
  spread(value, n) %>%
  mutate(Total = Present + Absent,
         Percent = round(Present/Total * 100,1))

2.10 Percent isolates with specific PMQR genes

acquired_report %>%
  select(-id) %>%
  t() %>%
  as.data.frame(.) %>%
  mutate(Gene = rownames(.)) %>%
  mutate_at(vars(-Gene),
            funs(as.numeric(as.character(.)))) %>%
  mutate(Present = rowSums(.[,-281])) %>%
  select(c(Gene, Present)) %>%
  rowwise() %>%
  mutate(Total = 280,
         Percent = round(Present/Total * 100,1))

2.11 qnr-genes per animal species

qnr_genes <- c("qnrA1","qnrB19","qnrB6","qnrB60","qnrS1","qnrS2","qnrS4")

acquired_report %>%
  left_join(isolate_data, by = "id") %>%
  gather(key, value, qnr_genes) %>%
  group_by(key, value, species) %>%
  dplyr::count() %>%
  ungroup() %>%
  mutate(value = if_else(value == 1, "Present", "Absent")) %>%
  spread(value, n, fill = 0) %>%
  rowwise() %>%
  mutate(Total = Present + Absent,
         Percent = round(Present / Total * 100, 1)) %>%
  arrange(key, desc(Percent))

2.12 Percent isolates with mutations in QRDR

mut_report %>%
  select(gyrA, gyrB, parC, parE) %>%
  group_by_all() %>%
  dplyr::count() %>%
  ungroup() %>%
  mutate_at(vars(c(gyrA, gyrB, parC, parE)),
            .funs = funs(as.numeric)) %>%
  mutate(type = ifelse(rowSums(.[,1:4]) == 0, 1, 0)) %>%
  select(n, type) %>%
  group_by(type) %>%
  summarise_all(funs(sum)) %>%
  mutate(type = ifelse(type == 0, "Present", "Absent")) %>%
  spread(type, n) %>%
  mutate(Total = Present + Absent,
         Percent = round(Present/Total * 100,1)) %>%
  select(-Total)

2.13 Percent mutations in each intrinsic gene

mut_report %>%
  select(-nt_change) %>%
  gather(gene, value, -id) %>%
  group_by(gene, value) %>%
  dplyr::count() %>%
  ungroup() %>%
  mutate(value = ifelse(value == 1, "Present", "Absent")) %>%
  spread(value, n, fill = 0) %>%
  mutate(Total = Present + Absent,
         Percent = round(Present/Total * 100,1)) %>%
  arrange(desc(Percent))

2.14 Specific mutations in QRDR

suppressWarnings(clean_mut_data %>%
  mutate(ref_ctg_change = ifelse(ref_ctg_change == ".", "", ref_ctg_change),
         ref_nt = ifelse(ref_nt == ".", "", ref_nt),
         ctg_nt = ifelse(ctg_nt == ".", "", ctg_nt),
         ref_ctg_nt_change = paste(ref_nt, ctg_nt, sep = "-"),
         mutation = paste(ref_ctg_change, ref_ctg_nt_change, sep = "/"),
         mutation = ifelse(mutation == "/-", "0", mutation)) %>%
  select(ref, gene_names, flag, mutation) %>%
  filter(flag %in% flag_selection,
         gene_names %in% c("gyrA","gyrB","parC","parE")) %>%
  mutate(id = 1:n()) %>%
  spread(gene_names, mutation) %>%
  group_by(ref) %>%
  summarise_all(funs(func_paste2)) %>%
  select(-c(id, flag)) %>%
  left_join(id_data, by = "ref") %>%
  dplyr::select(id, everything(), -ref) %>%
  filter(id %not_in% c("2016-17-565-1-S",
                       "2016-02-428-2-S",
                       "2016-02-486-2-S",
                       "2016-02-732-2-S",
                       "2014-01-1741-1-S")) %>%
  gather(gene, mut, -id) %>%
  mutate(mut = ifelse(mut == "" | mut == "." | is.na(mut) == TRUE, 0, mut),
         result = ifelse(mut != 0, 1, 0),
         result = as.integer(result),
         type = "mut") %>%
  separate(mut, into = paste("v", 1:12, sep = "_"), sep = ",") %>%
  gather(mut, value, paste("v", 1:12, sep = "_")) %>%
  separate(value, into = c("aa_change","nt_change"), sep = "/") %>%
  mutate(aa_change = ifelse(gene == "parC" & aa_change == "S58I", "S80I", aa_change)) %>%
  select(-mut) %>%
  dplyr::rename("mut" = aa_change) %>%
  filter(is.na(nt_change) == FALSE) %>%
  fix_gyr_par_results(.) %>%
  filter(result_total == 1) %>%
  group_by(gene, mut, nt_change) %>%
  dplyr::count() %>%
  separate(nt_change, into = c("Reference nt", "Contig nt"), sep = "-") %>%
  rowwise() %>%
  mutate(Total = 280,
         Percent = round(n/Total * 100,1)) %>%
  dplyr::rename("Mutation" = mut) %>%
  arrange(gene, desc(Percent)))

2.15 Specific QRDR mutations per animal species

suppressWarnings(clean_mut_data %>%
  mutate(ref_ctg_change = ifelse(ref_ctg_change == ".", "", ref_ctg_change),
         ref_nt = ifelse(ref_nt == ".", "", ref_nt),
         ctg_nt = ifelse(ctg_nt == ".", "", ctg_nt),
         ref_ctg_nt_change = paste(ref_nt, ctg_nt, sep = "-"),
         mutation = paste(ref_ctg_change, ref_ctg_nt_change, sep = "/"),
         mutation = ifelse(mutation == "/-", "0", mutation)) %>%
  select(ref, gene_names, flag, mutation) %>%
  filter(flag %in% flag_selection,
         gene_names %in% c("gyrA","gyrB","parC","parE")) %>%
  mutate(id = 1:n()) %>%
  spread(gene_names, mutation) %>%
  group_by(ref) %>%
  summarise_all(funs(func_paste2)) %>%
  dplyr::select(-c(id, flag)) %>%
  left_join(id_data, by = "ref") %>%
  dplyr::select(id, everything(), -ref) %>%
  filter(id %not_in% c("2016-17-565-1-S",
                       "2016-02-428-2-S",
                       "2016-02-486-2-S",
                       "2016-02-732-2-S",
                       "2014-01-1741-1-S")) %>%
  gather(gene, mut, -id) %>%
  mutate(mut = ifelse(mut == "" | mut == "." | is.na(mut) == TRUE, 0, mut),
         result = ifelse(mut != 0, 1, 0),
         result = as.integer(result),
         type = "mut") %>%
  separate(mut, into = paste("v", 1:12, sep = "_"), sep = ",") %>%
  gather(mut, value, paste("v", 1:12, sep = "_")) %>%
  separate(value, into = c("aa_change","nt_change"), sep = "/") %>%
  mutate(aa_change = ifelse(gene == "parC" & aa_change == "S58I", "S80I", aa_change)) %>%
  select(-mut) %>%
  dplyr::rename("mut" = aa_change) %>%
  filter(is.na(nt_change) == FALSE) %>%
  fix_gyr_par_results(.) %>%
  filter(result_total == 1) %>%
  left_join(isolate_data[c("id","species")], by = "id") %>%
  group_by(gene, mut, species) %>%
  dplyr::count() %>%
  rowwise() %>%
  mutate(Total = case_when(species == "Broiler" ~ 87,
                           species == "Pig" ~ 75,
                           species == "Red fox" ~ 52,
                           species == "Wild bird" ~ 66),
         Percent = round(n/Total * 100,1)) %>%
  dplyr::rename("Mutation" = mut) %>%
  arrange(gene, desc(Percent)))

2.16 Mutation combinations in QRDR

mut_quant %>%
  group_by(gyrA, gyrB, parC, parE) %>%
  dplyr::count() %>%
  rowwise() %>%
  mutate(Total = 280,
         Percent = round(n/Total * 100,1)) %>%
  arrange(desc(Percent))

2.17 Mutations in intrinsic genes per animal species

gene_names <- names(mut_report)

gene_names <- gene_names[-c(1,2)]

mut_report %>%
  left_join(isolate_data, by = "id") %>%
  gather(key, value, gene_names) %>%
  group_by(key, value, species) %>%
  dplyr::count() %>%
  ungroup() %>%
  mutate(value = if_else(value == 1, "Present", "Absent")) %>%
  spread(value, n, fill = 0) %>%
  rowwise() %>%
  mutate(Total = Present + Absent,
         Percent = round(Present / Total * 100, 1)) %>%
  dplyr::rename("Gene" = key,
                "Species" = species) %>%
  dplyr::select(Gene, Species, Present, Total, Percent) %>%
  arrange(Gene, desc(Percent))

2.18 Specific mutations in non-QRDR intrinsic genes

These mutations are the most prominent in each gene where mutations were identified (n > 2). The percentage is based on how many of the total isolates with mutations in the respective gene had the specific mutation in question.

mut_filtered %>%
  filter(gene %not_in% c("gyrA","gyrB","parC","parE")) %>%
  select(id, gene, mut) %>%
  mutate(entries = sapply(strsplit(.$mut, ","),
                          FUN = function(x) {length(x)})) %>%
  separate(mut, into = paste("v", 1:max(.$entries), sep = "_"), sep = ",") %>%
  gather(mut, value, paste("v", 1:max(.$entries), sep = "_")) %>%
  mutate(value = str_trim(value)) %>%
  select(id, gene, value) %>%
  filter(value != 0,
         is.na(value) == FALSE) %>%
  group_by(gene, value) %>%
  dplyr::count() %>%
  ungroup() %>%
  filter(n > 2) %>%
  mutate(percent = case_when(
    gene == "gadX" ~ 
      round(n / length(which(mut_report[, "gadX"] == 1))*100, 1),
    gene == "marA" ~ 
      round(n / length(which(mut_report[, "marA"] == 1))*100, 1),
    gene == "marR" ~ 
      round(n / length(which(mut_report[, "marR"] == 1))*100, 1),
    gene == "rpoB" ~ 
      round(n / length(which(mut_report[, "rpoB"] == 1))*100, 1),
    gene == "soxS" ~ 
      round(n / length(which(mut_report[, "soxS"] == 1))*100, 1))) %>%
  arrange(gene, desc(percent)) %>%
  dplyr::rename("Gene" = gene,
                "Mutation" = value,
                "Percent" = percent)

2.19 Mechanisms in isolates without QRDR mutations

mut_report %>%
  filter(gyrA == 0,
         parC == 0,
         parE == 0) %>%
  left_join(acquired_report, by = "id") %>%
  mutate_at(vars(qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4),
            funs(as.numeric(.))) %>%
  mutate(qnr = ifelse(rowSums(.[,14:20]) == 0, 0, 1)) %>%
  select(-c(qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4)) %>%
  select(-c(nt_change, id)) %>%
  group_by_all() %>%
  dplyr::count() %>%
  ungroup() %>%
  dplyr::select(-c(gyrA,gyrB,parC,parE))

2.20 Total combinations of resistance mechanisms

mut_report %>%
  select(-c(nt_change, gyrA, parC, parE)) %>%
  left_join(mut_quant[c("gyrA","parC","parE","id")], by = "id") %>%
  left_join(acquired_report, by = "id") %>%
  mutate_at(vars(qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4),
            funs(as.numeric(.))) %>%
  mutate(qnr = ifelse(rowSums(.[,13:19]) == 0, 0, 1)) %>%
  left_join(mic_data[c("id","CIP","NAL")], by = "id") %>%
  select(-c(id,qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4)) %>%
  group_by_all() %>%
  dplyr::count() %>%
  arrange(desc(CIP))

2.21 Chi Squared test for significant differences in resistance mechanisms

do_chisq_test <- function(df) {
  df <- df %>%
    dplyr::slice(expand.grid(1:length(unique(species)), 1:length(unique(species))) %>% rev %>% 
          dplyr::filter(Var2 < Var1) %>% t) %>%
    mutate(test = rep(1:(n()/2), each = 2)) %>%
    group_by(test) %>%
    dplyr::do(data_frame(gene = .$key,
                  test = dplyr::first(.$test),
                  species1 = dplyr::first(.$species),
                  species2 = dplyr::last(.$species),
                  data = list(matrix(c(.$Absent,
                                       .$Present),
                                     ncol = 2)))) %>%
    mutate(chi_test = map(data, chisq.test, correct = FALSE)) %>%
    mutate(p.value = map_dbl(chi_test, "p.value"),
           dupl = duplicated(test)) %>%
    dplyr::filter(dupl == FALSE) %>%
    ungroup() %>%
    select(gene, species1, species2, p.value) %>%
    mutate(p.value = round(p.value, 5)) %>%
    dplyr::filter(p.value <= 0.05)
  return(df)
}

total_df <- acquired_report %>%
  left_join(mut_report, by = "id") %>%
  left_join(isolate_data[c("id", "species")], by = "id") %>%
  select(-nt_change)

mechanism_per_species <- total_df %>%
  select(id, species, everything()) %>%
  select(-id) %>%
  gather(key, value, -species) %>%
  group_by_all() %>%
  dplyr::count() %>%
  ungroup() %>%
  mutate(value = if_else(value == 1, "Present", "Absent")) %>%
  spread(value, n, fill = 0) %>%
  select(key, species, Present, Absent) %>%
  split(f = .$key)

intrinsic_names <- names(mut_report)
intrinsic_names <- intrinsic_names[-c(1,2)]
  
intrinsic_gene_perc <- mut_report %>%
  left_join(isolate_data, by = "id") %>%
  gather(key, value, intrinsic_names) %>%
  group_by(key, value, species) %>%
  dplyr::count() %>%
  ungroup() %>%
  mutate(value = if_else(value == 1, "Present", "Absent")) %>%
  spread(value, n, fill = 0) %>%
  rowwise() %>%
  mutate(Total = Present + Absent,
         Percent = round(Present / Total * 100, 1)) %>%
  select(key, species, Percent) %>%
  dplyr::rename("gene" = key)

acquired_names <- names(acquired_report)
acquired_names <- acquired_names[-1]

acquired_genes_perc <- acquired_report %>%
  left_join(isolate_data, by = "id") %>%
  gather(key, value, acquired_names) %>%
  group_by(key, value, species) %>%
  dplyr::count() %>%
  ungroup() %>%
  mutate(value = if_else(value == 1, "Present", "Absent")) %>%
  spread(value, n, fill = 0) %>%
  rowwise() %>%
  mutate(Total = Present + Absent,
         Percent = round(Present / Total * 100, 1)) %>%
  select(key, species, Percent) %>%
  dplyr::rename("gene" = key)

total_perc <- rbind(intrinsic_gene_perc, acquired_genes_perc)

p_values_total <- suppressWarnings(lapply(mechanism_per_species, function(x) do_chisq_test(x))) %>%
  bind_rows() %>%
  left_join(total_perc, by = c("gene", c("species1" = "species"))) %>%
  dplyr::rename("Percent species 1" = Percent) %>%
  left_join(total_perc, by = c("gene", c("species2" = "species"))) %>%
  dplyr::rename("Percent species 2" = Percent) %>%
  dplyr::select(gene, species1, `Percent species 1`, species2, `Percent species 2`, p.value)

p_values_total

2.22 Resistant isolates per antimicrobial per species

percent_am_res <- mic_data %>%
  left_join(isolate_data[,c("id","species")], by = "id") %>%
  mutate(SMX_R = ifelse(as.numeric(SMX) > 64, 1, 0),
         TMP_R = ifelse(as.numeric(TMP) > 2, 1, 0),
         CIP_R = ifelse(as.numeric(CIP) > 0.06, 1, 0),
         TET_R = ifelse(as.numeric(TET) > 8, 1, 0),
         NAL_R = ifelse(as.numeric(NAL) > 16, 1, 0),
         CTX_R = ifelse(as.numeric(CTX) > 0.25, 1, 0),
         CHL_R = ifelse(as.numeric(CHL) > 16, 1, 0),
         AMP_R = ifelse(as.numeric(AMP) > 8, 1, 0),
         GEN_R = ifelse(as.numeric(GEN) > 2, 1, 0)) %>%
  select(species,contains("_R")) %>%
  gather(compound, value, -species) %>%
  mutate(compound = sub("_R", "", compound)) %>%
  group_by_all() %>%
  dplyr::count() %>%
  ungroup() %>%
  mutate(value = ifelse(value == 0, "Absent", "Present")) %>%
  spread(value, n, fill = 0) %>%
  rowwise() %>%
  mutate(Total = Present + Absent,
         Percent = round((Present/Total)*100, 1))

percent_am_res

2.23 Resistance mechanisms per sequence type

mut_report %>%
  left_join(acquired_report, by = "id") %>%
  left_join(mlst_data[,c("id","ST")], by = "id") %>%
  select(-c(nt_change, id)) %>%
  gather(gene, value, -ST) %>%
  group_by_all() %>%
  dplyr::count() %>%
  ungroup() %>%
  mutate(value = ifelse(value == 1, "Present", "Absent")) %>%
  spread(value, n, fill = 0) %>%
  filter(Present != 0) %>%
  mutate(Total = Present + Absent) %>%
  filter(Total > 2) %>%
  rowwise() %>%
  mutate(Percent = round(Present/Total*100, 1)) %>%
  ungroup() %>%
  group_by(ST) %>%
  mutate(no_of_mechanisms = n())

2.24 Resistance mechanisms per sequence type per animal species

mut_report %>%
  left_join(acquired_report, by = "id") %>%
  left_join(mlst_data[,c("id","ST")], by = "id") %>%
  left_join(isolate_data[,c("id","species")], by = "id") %>%
  select(-c(nt_change, id)) %>%
  gather(gene, value, -c(ST, species)) %>%
  group_by_all() %>%
  dplyr::count() %>%
  ungroup() %>%
  mutate(value = ifelse(value == 1, "Present", "Absent")) %>%
  spread(value, n, fill = 0) %>%
  filter(Present != 0) %>%
  mutate(Total = Present + Absent) %>%
  filter(Total > 2) %>%
  rowwise() %>%
  mutate(Percent = round(Present/Total*100, 1)) %>%
  ungroup() %>%
  group_by(ST, species) %>%
  mutate(no_of_mechanisms = n()) %>%
  arrange(ST, species)

2.25 Significant differences between the amount of resistant isolates

cip_nal_res <- mic_data %>%
  left_join(isolate_data[,c("id","species")], by = "id") %>%
  mutate(SMX_R = ifelse(as.numeric(SMX) > 64, 1, 0),
         TMP_R = ifelse(as.numeric(TMP) > 2, 1, 0),
         CIP_R = ifelse(as.numeric(CIP) > 0.06, 1, 0),
         TET_R = ifelse(as.numeric(TET) > 8, 1, 0),
         NAL_R = ifelse(as.numeric(NAL) > 16, 1, 0),
         CTX_R = ifelse(as.numeric(CTX) > 0.25, 1, 0),
         CHL_R = ifelse(as.numeric(CHL) > 16, 1, 0),
         AMP_R = ifelse(as.numeric(AMP) > 8, 1, 0),
         GEN_R = ifelse(as.numeric(GEN) > 2, 1, 0)) %>%
  select(species,contains("_R")) %>%
  gather(compound, value, -species) %>%
  mutate(compound = sub("_R", "", compound)) %>%
  group_by_all() %>%
  dplyr::count() %>%
  filter(compound %in% c("CIP","NAL")) %>%
  ungroup() %>%
  mutate(value = ifelse(value == 0, "Absent", "Present")) %>%
  spread(value, n, fill = 0) %>%
  select(compound, species, Present, Absent) %>%
  dplyr::rename("key" = compound) %>%
  split(f = .$key)
  
p_values_res <- suppressWarnings(lapply(cip_nal_res, function(x) do_chisq_test(x))) %>%
  bind_rows() %>%
  left_join(percent_am_res[,c("species","compound","Percent")], by = c("species1" = "species",
                                                                       "gene" = "compound")) %>%
  dplyr::rename("Percent_species1" = Percent) %>%
  left_join(percent_am_res[,c("species","compound","Percent")], by = c("species2" = "species",
                                                                       "gene" = "compound")) %>%
  dplyr::rename("Percent_species2" = Percent,
                "Compound" = gene) %>%
  select(Compound, species1, Percent_species1, species2, Percent_species2, p.value)

p_values_res
rest_res <- mic_data %>%
  left_join(isolate_data[,c("id","species")], by = "id") %>%
  mutate(SMX_R = ifelse(as.numeric(SMX) > 64, 1, 0),
         TMP_R = ifelse(as.numeric(TMP) > 2, 1, 0),
         CIP_R = ifelse(as.numeric(CIP) > 0.06, 1, 0),
         TET_R = ifelse(as.numeric(TET) > 8, 1, 0),
         NAL_R = ifelse(as.numeric(NAL) > 16, 1, 0),
         CTX_R = ifelse(as.numeric(CTX) > 0.25, 1, 0),
         CHL_R = ifelse(as.numeric(CHL) > 16, 1, 0),
         AMP_R = ifelse(as.numeric(AMP) > 8, 1, 0),
         GEN_R = ifelse(as.numeric(GEN) > 2, 1, 0)) %>%
  select(species,contains("_R")) %>%
  gather(compound, value, -species) %>%
  mutate(compound = sub("_R", "", compound)) %>%
  group_by_all() %>%
  dplyr::count() %>%
  filter(compound %not_in% c("CIP","NAL")) %>%
  ungroup() %>%
  mutate(value = ifelse(value == 0, "Absent", "Present")) %>%
  spread(value, n, fill = 0) %>%
  select(compound, species, Present, Absent) %>%
  dplyr::rename("key" = compound) %>%
  split(f = .$key)

p_values_res_rest <- suppressWarnings(lapply(rest_res, function(x) do_chisq_test(x))) %>%
  bind_rows() %>%
  left_join(percent_am_res[,c("species","compound","Percent")], by = c("species1" = "species",
                                                                       "gene" = "compound")) %>%
  dplyr::rename("Percent_species1" = Percent) %>%
  left_join(percent_am_res[,c("species","compound","Percent")], by = c("species2" = "species",
                                                                       "gene" = "compound")) %>%
  dplyr::rename("Percent_species2" = Percent,
                "Compound" = gene) %>%
  select(Compound, species1, Percent_species1, species2, Percent_species2, p.value)

p_values_res_rest

2.26 Correlation between specific mutations and qnr-genes

AddCol <- function(df, col_name) {
  # split rows by delimeters
  string_to_proc <- df %>% select(!!col_name) %>%
    base::unlist() %>% str_split(regex("\\, |\\,")) 
  # find unique entries
  unique_strings <- string_to_proc %>%
    base::unlist() %>% base::unique()
  # construct names of the new columns
  cols_names <- paste(col_name, unique_strings, sep = "_")
  # construct 0/1-content columns for each unique entry
  cols_content <- sapply(function(i) {
    as.integer(base::unlist(lapply(function(Z) any(Z %in% unique_strings[i]), 
                             X = string_to_proc)))
  }, X = seq_along(unique_strings))
  res <- data.frame(cols_content)
  names(res) <- cols_names
  return(res)
}

create_corr_matrix <- function(df) {
  df <- df %>%
    as.matrix %>%
    cor(method = "spearman") %>%
    as.data.frame %>%
    rownames_to_column(var = "var1") %>%
    gather(var2, value, -var1) %>%
    mutate(var1 = gsub("(.*?)_R$", "\\1", var1),
           var2 = gsub("(.*?)_R$", "\\1", var2))
  return(df)
}

col_proc <- c("gyrA","gyrB","parC","parE")

cols_to_add <- sapply(function(i) {AddCol(df = mut_quant, col_name = col_proc[i])},
                      X = seq_along(col_proc)) %>%
  bind_cols() %>%
  select(-c(gyrA_0, parC_0, gyrB_0, parE_0)) %>%
  mutate_all(funs(as.numeric)) %>%
  {. ->> tot_mut_report} %>%
  select(gyrA_S83L, gyrA_D87N, parC_S80I) %>%
  mutate(gyrA_S83L_D87N = ifelse(gyrA_S83L == 1 & gyrA_D87N == 1, 1, 0),
         gyrA2_parC1 = ifelse(gyrA_S83L == 1 & gyrA_D87N == 1 & parC_S80I == 1, 1, 0))
  

test <- mut_quant %>%
  cbind(tot_mut_report) %>%
  select(-contains("mut")) %>%
  select(-c(gyrA, gyrB, parC, parE)) %>%
  left_join(acquired_report, by = "id") %>%
  mutate_at(vars(-id),
            funs(as.numeric)) %>%
  mutate(qnr = ifelse(rowSums(.[,21:27]) == 0, 0, 1)) %>%
  select(-id)


test2 <- create_corr_matrix(test)
## Warning in cor(., method = "spearman"): the standard deviation is zero
cor.test(test$gyrA_S83L, test$qnr)
## 
##  Pearson's product-moment correlation
## 
## data:  test$gyrA_S83L and test$qnr
## t = -15.973, df = 278, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.7483113 -0.6252511
## sample estimates:
##        cor 
## -0.6917707

3 Figures

3.1 Sequence types per animal group

palette4 <- c("Livestock" = "#80b1d3",
              "Wild" = "#fb8072")

mlst_data[,c("id","ST")] %>%
  left_join(isolate_data[,c("id","species")], by = "id") %>%
  mutate(animal_group = ifelse(species %in% c("Broiler","Pig"), "Livestock", "Wild")) %>%
  group_by(ST, animal_group) %>%
  dplyr::count() %>%
  group_by(ST) %>%
  mutate(total = dplyr::first(n) + dplyr::last(n)) %>%
  filter(total > 4) %>%
  ggplot(aes(factor(ST), n, fill = animal_group))+
  geom_col(color = "black")+
  labs(x = "Sequence Types")+
  scale_fill_manual(values = palette4)+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.45),
        legend.title = element_blank())

3.2 Resistance mechanisms per sequence type

genes <- c(names(mut_report), names(acquired_report), "qnr")
genes <- genes[!genes %in% c("id","nt_change","qnrA1","qnrB19","qnrB6","qnrB60","qnrS1","qnrS2","qnrS4")]

no_of_mlst <- mlst_data[,c("id","ST")] %>%
  left_join(isolate_data[,c("id","species")], by = "id") %>%
  group_by(ST, species) %>%
  dplyr::count() %>%
  group_by(ST) %>%
  mutate(group_n = sum(n)) %>%
  filter(group_n > 3)


mlst_mechanisms_report <- mut_report %>%
  left_join(acquired_report, by = "id") %>%
  mutate_at(vars(qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4),
            funs(as.numeric(.))) %>%
  mutate(qnr = ifelse(rowSums(.[,14:20]) == 0, 0, 1)) %>%
  select(-c(qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4,nt_change)) %>%
  gather(gene, value, -id) %>%
  mutate(present = ifelse(value == 1, gene, NA),
         ref = 1:n()) %>%
  spread(gene, value) %>%
  group_by(id) %>%
  summarise_all(funs(func_paste)) %>%
  left_join(mlst_data[c("id","ST")], by = "id") %>%
  select(-c(ref, id)) %>%
  ungroup() %>%
  mutate_at(vars(-present, ST), .funs = as.numeric) %>%
  mutate(none = ifelse(rowSums(.[,genes]) == 0, 1, 0),
         present = ifelse(present == "", "None", present)) %>%
  gather(gene, value, -c(present,ST)) %>%
  mutate(gene = ifelse(value == 0 & gene != "None", NA, gene),
         value = ifelse(gene == "None" & value == 0, NA, value)) %>%
  na.omit() %>%
  group_by(ST, gene, value) %>%
  dplyr::count() %>%
  filter(ST %in% no_of_mlst$ST)

palette <- c("Broiler" = "#4575b4",
             "Pig" = "#74add1",
             "Red fox" = "#f46d43",
             "Wild bird" = "#fdae61")

p1 <- ggplot(mlst_mechanisms_report, aes(factor(ST), gene, size = n))+
  geom_point()+
  guides(size = FALSE)+
  labs(x = "Sequence Types",
       y = "Genes")+
  theme(axis.text.x = element_text(size = 10),
        axis.text.y = element_text(size = 10),
        axis.ticks.x = element_blank())

p2 <- ggplot(no_of_mlst, aes(factor(ST), n, fill = species))+
  geom_col(color = "black")+
  scale_fill_manual(values = palette)+
  guides(fill = FALSE)+
  theme(axis.title.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.x = element_blank())

plot_grid(p2, p1, nrow = 2, align = "v")

3.3 Percent resistance mechanisms in total

total_mechanisms <- mut_report %>%
  select(-nt_change) %>%
  left_join(acquired_report, by = "id") %>%
  mutate_at(vars(qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4),
            funs(as.numeric(.))) %>%
  mutate(qnr = ifelse(rowSums(.[,13:19]) == 0, 0, 1)) %>%
  select(-c(qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4)) %>%
  gather(gene, result, -id) %>%
  group_by(gene, result) %>%
  dplyr::count() %>%
  ungroup() %>%
  mutate(result = ifelse(result == 1, "Present", "Absent")) %>%
  spread(result, n, fill = 0) %>%
  rowwise() %>%
  mutate(Total = Present + Absent,
         Percent = round((Present/Total)*100, 1),
         lwr = round(get_binCI(Present, Total)[1], 1),
         upr = round(get_binCI(Present, Total)[2], 1),
         type = ifelse(gene %in% c("qnr","qepA4"), "Acquired","Intrinsic"))

palette2 <- c("Acquired" = "#8dd3c7",
              "Intrinsic" = "#80b1d3")

ggplot(total_mechanisms, aes(reorder(gene, -Percent), Percent, fill = type)) +
  geom_col(color = "black") +
  geom_errorbar(aes(ymin = lwr, ymax = upr),
                width = 0.5)+
  scale_fill_manual(values = palette2) +
  labs(x = "Genes",
       y = "Percent (%) Isolates",
       fill = NULL)

3.4 Percent QREC isolates with mutations in intrinsic genes

gene_names <- names(mut_report)

gene_names <- gene_names[-c(1,2)]

palette <- c("Broiler" = "#4575b4",
             "Pig" = "#74add1",
             "Red fox" = "#f46d43",
             "Wild bird" = "#fdae61")

mut_report %>%
    left_join(isolate_data, by = "id") %>%
    gather(key, value, gene_names) %>%
    group_by(key, value, species) %>%
    dplyr::count() %>%
    ungroup() %>%
    mutate(value = if_else(value == 1, "Present", "Absent")) %>%
    spread(value, n, fill = 0) %>%
    rowwise() %>%
    mutate(
      Total = Present + Absent,
      Percent = round(Present / Total * 100, 1),
      lwr = round(get_binCI(Present, Total)[1], 1),
      upr = round(get_binCI(Present, Total)[2], 1)
    ) %>%
    mutate(species = factor(species,
                            levels = c("Broiler","Pig","Wild bird","Red fox"))) %>%
    ggplot(aes(reorder(key, -Percent), Percent, fill = species)) +
    geom_col(color = "black",
             position = position_dodge(1)) +
    geom_errorbar(
      aes(ymin = lwr, ymax = upr),
      width = 0.6,
      position = position_dodge(1)
    ) +
    scale_fill_manual(values = palette) +
    labs(y = "Percent (%) of isolates") +
    guides(fill = FALSE) +
    theme_classic() +
    theme(
      axis.text.x = element_text(
        angle = 90,
        size = 12,
        hjust = 1,
        vjust = 0.4
      ),
      axis.title.x = element_blank(),
      axis.text.y = element_text(size = 12),
      axis.title.y = element_text(size = 14),
      strip.text = element_text(size = 12)
    ) +
    facet_wrap(~ species)

3.5 Percent QRDR mutations per animal species

palette <- c("Broiler" = "#4575b4",
             "Pig" = "#74add1",
             "Red fox" = "#f46d43",
             "Wild bird" = "#fdae61")

suppressWarnings(clean_mut_data %>%
  mutate(ref_ctg_change = ifelse(ref_ctg_change == ".", "", ref_ctg_change),
         ref_nt = ifelse(ref_nt == ".", "", ref_nt),
         ctg_nt = ifelse(ctg_nt == ".", "", ctg_nt),
         ref_ctg_nt_change = paste(ref_nt, ctg_nt, sep = "-"),
         mutation = paste(ref_ctg_change, ref_ctg_nt_change, sep = "/"),
         mutation = ifelse(mutation == "/-", "0", mutation)) %>%
  select(ref, gene_names, flag, mutation) %>%
  filter(flag %in% flag_selection,
         gene_names %in% c("gyrA","gyrB","parC","parE")) %>%
  mutate(id = 1:n()) %>%
  spread(gene_names, mutation) %>%
  group_by(ref) %>%
  summarise_all(funs(func_paste2)) %>%
  dplyr::select(-c(id, flag)) %>%
  left_join(id_data, by = "ref") %>%
  dplyr::select(id, everything(), -ref) %>%
  filter(id %not_in% c("2016-17-565-1-S",
                       "2016-02-428-2-S",
                       "2016-02-486-2-S",
                       "2016-02-732-2-S",
                       "2014-01-1741-1-S")) %>%
  gather(gene, mut, -id) %>%
  mutate(mut = ifelse(mut == "" | mut == "." | is.na(mut) == TRUE, 0, mut),
         result = ifelse(mut != 0, 1, 0),
         result = as.integer(result),
         type = "mut") %>%
  separate(mut, into = paste("v", 1:12, sep = "_"), sep = ",") %>%
  gather(mut, value, paste("v", 1:12, sep = "_")) %>%
  separate(value, into = c("aa_change","nt_change"), sep = "/") %>%
  mutate(aa_change = ifelse(gene == "parC" & aa_change == "S58I", "S80I", aa_change)) %>%
  select(-mut) %>%
  dplyr::rename("mut" = aa_change) %>%
  filter(is.na(nt_change) == FALSE) %>%
  fix_gyr_par_results(.) %>%
  filter(result_total == 1) %>%
  left_join(isolate_data[c("id","species")], by = "id") %>%
  group_by(gene, mut, species) %>%
  dplyr::count() %>%
  rowwise() %>%
  mutate(Total = case_when(species == "Broiler" ~ 87,
                           species == "Pig" ~ 75,
                           species == "Red fox" ~ 52,
                           species == "Wild bird" ~ 66),
         Percent = round(n/Total * 100,1),
         lwr = round(get_binCI(n, Total)[1], 1),
         upr = round(get_binCI(n, Total)[2], 1)) %>%
  dplyr::rename("Mutation" = mut)) %>%
  ggplot(aes(reorder(Mutation, -Percent), Percent, fill = species))+
  geom_col(color = "black", position = position_dodge(0.9))+
  geom_errorbar(aes(ymin = lwr, ymax = upr),
                position = position_dodge(0.9),
                width = 0.5)+
  scale_fill_manual(values = palette)+
  guides(fill = FALSE)+
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.3),
        axis.title.x = element_blank())+
  facet_wrap(~ gene, scale = "free_x")

3.6 Percent QREC isolates with Qnr genes per species

qnr_genes <- c("qnrA1","qnrB19","qnrB6","qnrB60","qnrS1","qnrS2","qnrS4")

acquired_report %>%
    left_join(isolate_data, by = "id") %>%
    gather(key, value, qnr_genes) %>%
    group_by(key, value, species) %>%
    dplyr::count() %>%
    ungroup() %>%
    mutate(value = if_else(value == 1, "Present", "Absent")) %>%
    spread(value, n, fill = 0) %>%
    rowwise() %>%
    mutate(
      Total = Present + Absent,
      Percent = round(Present / Total * 100, 1)
    ) %>%
    ggplot(aes(key, Percent, fill = species)) +
    geom_col(color = "black",
             position = position_dodge(0.9)) +
    scale_fill_manual(values = palette) +
    labs(y = "Percent (%) of isolates",
         fill = NULL) +
    theme_classic() +
    theme(
      axis.text = element_text(size = 12),
      axis.title.y = element_text(size = 14),
      legend.text = element_text(size = 12),
      axis.title.x = element_blank(),
      legend.justification = c(0, 1),
      legend.position = c(0.82, 0.97)
    )

3.7 Correlation plot for resistance mechanisms

total_df <- acquired_report %>%
  left_join(mut_report, by = "id") %>%
  left_join(isolate_data[c("id","species")], by = "id") %>%
  select(-c(nt_change, id)) %>%
  mutate_at(vars(-species),
            funs(as.numeric)) %>%
  mutate(qnr = ifelse(rowSums(.[,2:8]) > 0, 1, 0)) %>%
  {. ->> total_resistance_report} %>%
  split(., f = .$species) %>%
  lapply(., function(x) select(x, -species))
  

corr_matrices <- lapply(total_df, function(x) create_corr_matrix(x)) %>%
  bind_rows(.id = "species") %>%
  mutate(gene1 = gsub("(.*?)_.+", "\\1", var1),
           gene2 = gsub("(.*?)_.+", "\\1", var2),
           var1 = gsub(".+_(.*?)", "\\1", var1),
           var2 = gsub(".+_(.*?)", "\\1", var2),
           type1 = case_when(gene1 == "gyrA" ~ 1,
                          gene1 == "gyrB" ~ 2,
                          gene1 == "parC" ~ 3,
                          gene1 == "parE" ~ 4,
                          gene1 == "marR" ~ 5,
                          gene1 == "marA" ~ 6,
                          gene1 == "rpoB" ~ 7,
                          gene1 == "soxS" ~ 8,
                          gene1 == "robA" ~ 9,
                          gene1 == "gadX" ~ 10,
                          gene1 == "qnrA1" ~ 11,
                          gene1 == "qnrB19" ~ 12,
                          gene1 == "qnrB6" ~ 13,
                          gene1 == "qnrB60" ~ 14,
                          gene1 == "qnrS1" ~ 15,
                          gene1 == "qnrS2" ~ 16,
                          gene1 == "qnrS4" ~ 17,
                          gene1 == "qnr" ~ 18,
                          gene1 == "qepA4" ~ 19),
           type2 = case_when(gene2 == "gyrA" ~ 1,
                          gene2 == "gyrB" ~ 2,
                          gene2 == "parC" ~ 3,
                          gene2 == "parE" ~ 4,
                          gene2 == "marR" ~ 5,
                          gene2 == "marA" ~ 6,
                          gene2 == "rpoB" ~ 7,
                          gene2 == "soxS" ~ 8,
                          gene2 == "robA" ~ 9,
                          gene2 == "gadX" ~ 10,
                          gene2 == "qnrA1" ~ 11,
                          gene2 == "qnrB19" ~ 12,
                          gene2 == "qnrB6" ~ 13,
                          gene2 == "qnrB60" ~ 14,
                          gene2 == "qnrS1" ~ 15,
                          gene2 == "qnrS2" ~ 16,
                          gene2 == "qnrS4" ~ 17,
                          gene2 == "qnr" ~ 18,
                          gene2 == "qepA4" ~ 19))

cols <- c("#e7f0fa","#c9e2f6","#95cbee","#0099dc","#4ab04a",
          "#ffd73e","#eec73a","#e29421","#f05336","#ce472e")


ggplot(corr_matrices, aes(reorder(var1, type1), reorder(var2, type2), fill = value))+
  geom_tile(color = "white")+
  scale_fill_gradient(low = "black",
                      high = "white",
                      na.value = "grey95")+
  labs(fill = NULL)+
  theme_classic()+
  theme(axis.text = element_text(size = 8),
        axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4),
        axis.title = element_blank())+
  coord_fixed()+
  facet_wrap(~species)

3.8 MIC-plots

total_report <- mut_report %>%
  left_join(acquired_report, by = "id") %>%
  dplyr::select(-nt_change) %>%
  mutate_at(vars(-id),
            funs(as.numeric(.))) %>%
  mutate(qnr = ifelse(rowSums(.[,13:19]) > 0, 1, 0)) %>%
  select(-c(qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4)) %>%
  gather(gene, value, -id) %>%
  mutate(present = ifelse(value == 1, gene, NA),
         ref = 1:n()) %>%
  spread(gene, value) %>%
  group_by(id) %>%
  summarise_all(funs(func_paste)) %>%
  select(-ref) %>%
  left_join(mic_data[c("id","CIP")], by = "id") %>%
  group_by(present,CIP) %>%
  dplyr::count() %>%
  ungroup() %>%
  rowwise() %>%
  mutate(present = ifelse(present == "", "None", present),
         total = 280,
         percent = round(n/total * 100, 1),
         lwr = round(get_binCI(n, total)[1], 1),
         upr = round(get_binCI(n, total)[2], 1)) %>%
  ungroup() %>%
  mutate(entries = sapply(strsplit(.$present, ","),
                          FUN = function(x) {length(x)}))

genes <- c(names(mut_report), names(acquired_report), "qnr")
genes <- genes[!genes %in% c("id","nt_change","qnrA1","qnrB19","qnrB6","qnrB60","qnrS1","qnrS2","qnrS4")]

n_plot <- ggplot(total_report, aes(reorder(present, entries), percent, fill = factor(CIP)))+
  geom_col(color = "black")+
  labs(y = "Percent (%) isolates")+
  scale_fill_viridis(discrete = TRUE)+
  guides(fill=guide_legend(ncol = 10))+
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.title.y = element_text(size = 10),
        axis.text.y = element_text(size = 8),
        axis.title.x = element_blank(),
        legend.text = element_text(size = 8),
        legend.title = element_blank(),
        legend.position = "top",
        legend.spacing.x = unit(0.2, "cm"),
        legend.justification = "center")

dot_report <- mut_report %>%
  left_join(acquired_report, by = "id") %>%
  select(-nt_change) %>%
  mutate_at(vars(-id),
            funs(as.numeric(.))) %>%
  mutate(qnr = ifelse(rowSums(.[,13:19]) > 0, 1, 0)) %>%
  select(-c(qnrA1,qnrB19,qnrB6,qnrB60,qnrS1,qnrS2,qnrS4)) %>%
  gather(gene, value, -id) %>%
  mutate(present = ifelse(value == 1, gene, NA),
         ref = 1:n()) %>%
  spread(gene, value) %>%
  group_by(id) %>%
  summarise_all(funs(func_paste)) %>%
  left_join(mic_data[c("id","CIP")], by = "id") %>%
  select(-c(ref, id)) %>%
  ungroup() %>%
  mutate_at(vars(-present, CIP), .funs = as.numeric) %>%
  mutate(none = ifelse(rowSums(.[,genes]) == 0, 1, 0),
         present = ifelse(present == "", "None", present)) %>%
  gather(gene, value, -c(present,CIP)) %>%
  mutate(gene = ifelse(value == 0 & gene != "None", NA, gene),
         value = ifelse(gene == "None" & value == 0, NA, value)) %>%
  na.omit() %>%
  mutate(entries = sapply(strsplit(.$present, ","),
                          FUN = function(x) {length(x)}))

dot_plot <- ggplot(dot_report, aes(reorder(present, entries), gene, fill = gene))+
  geom_point(size = 2)+
  scale_y_discrete(limits = c("gyrA","parC","parE","gadX","marA","marR","rpoB","soxS","qepA4","qnr"))+
  guides(fill = FALSE)+
  theme(axis.text.x = element_blank(),
        axis.text.y = element_text(size = 8),
        axis.title = element_blank(),
        axis.ticks.x = element_blank())

plot_grid(n_plot, dot_plot, nrow = 2, align = "v", rel_heights = c(3.5/5, 1.5/5))

3.9 Additional resistance levels per animal species

mic_data %>%
  left_join(isolate_data[,c("id","species")], by = "id") %>%
  mutate(SMX_R = ifelse(as.numeric(SMX) > 64, 1, 0),
         TMP_R = ifelse(as.numeric(TMP) > 2, 1, 0),
         CIP_R = ifelse(as.numeric(CIP) > 0.06, 1, 0),
         TET_R = ifelse(as.numeric(TET) > 8, 1, 0),
         NAL_R = ifelse(as.numeric(NAL) > 16, 1, 0),
         CTX_R = ifelse(as.numeric(CTX) > 0.25, 1, 0),
         CHL_R = ifelse(as.numeric(CHL) > 16, 1, 0),
         AMP_R = ifelse(as.numeric(AMP) > 8, 1, 0),
         GEN_R = ifelse(as.numeric(GEN) > 2, 1, 0)) %>%
  select(species,contains("_R")) %>%
  gather(compound, value, -species) %>%
  mutate(compound = sub("_R", "", compound)) %>%
  group_by_all() %>%
  dplyr::count() %>%
  ungroup() %>%
  mutate(value = ifelse(value == 0, "Absent", "Present")) %>%
  spread(value, n, fill = 0) %>%
  rowwise() %>%
  mutate(Total = Present + Absent,
         Percent = round((Present/Total)*100, 1),
         lwr = round(get_binCI(Present, Total)[1], 1),
         upr = round(get_binCI(Present, Total)[2], 1),
         species = factor(species, levels = c("Broiler","Pig","Wild bird","Red fox"))) %>%
  ggplot(aes(reorder(compound, -Percent), Percent, fill = species))+
  geom_col(color = "black")+
  geom_errorbar(aes(ymin = lwr, ymax = upr),
                width = 0.6)+
  guides(fill = FALSE)+
  scale_fill_manual(values = palette)+
  theme(axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4))+
  facet_wrap(~species)

3.10 CIP/NAL MIC-values per animal species

mic_data[,c("id","CIP")] %>%
  left_join(isolate_data[,c("id","species")], by = "id") %>%
  group_by(CIP,species) %>%
  dplyr::count() %>%
  ggplot(aes(factor(CIP), n, fill = species))+
  geom_col(color = "black", position = position_fill())+
  labs(y = "Percent isolates",
       x = "CIP MIC")+
  scale_fill_manual(values = palette)+
  scale_y_continuous(labels = percent_format())+
  theme(legend.title = element_blank())

mic_data[,c("id","NAL")] %>%
  left_join(isolate_data[,c("id","species")], by = "id") %>%
  group_by(NAL,species) %>%
  dplyr::count() %>%
  ggplot(aes(factor(NAL), n, fill = species))+
  geom_col(color = "black", position = position_fill())+
  labs(y = "Percent isolates",
       x = "NAL MIC")+
  scale_fill_manual(values = palette)+
  scale_y_continuous(labels = percent_format())+
  theme(legend.title = element_blank())

4 Dendrograms

4.1 Number of NA’s per isolate in cgMLST allele matrix

The red outliers have been excluded from the cgMLST analysis due to too many missing alleles

cgMLSTdata <- read.table("../data/cgMLST.txt",
                         sep = "\t",
                         colClasses = "factor",
                         header = T)

cgMLSTdata[cgMLSTdata == "NaN"] <- NA

cgMLSTdata2 <- cgMLSTdata %>%
  mutate(n_NA = apply(., 1, function(x) sum(is.na(x))),
         group = 1)

ggplot(cgMLSTdata2, aes(factor(group), n_NA)) +
  geom_boxplot(outlier.size = 2) +
  geom_point(data = cgMLSTdata2[cgMLSTdata2$n_NA > 500,], aes(factor(group), n_NA),
             pch = 21,
             size = 2,
             fill = "red") +
  labs(x = NULL,
       y = "Number of NA's per isolate") +
  theme_classic() +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank())

4.2 cgMLST-tree total

palette3 <- c("1-A" = "#fc8d59",
              "1-I" = "#80b1d3",
              "0" = "grey95")

qr_genes <- mut_report[c("id","gyrA","parC","parE","rpoB","marA","marR","gadX","soxS")] %>%
  mutate_at(vars(-id),
            funs(ifelse(. == "1", "1-I", "0")))
  
ac_genes <- acquired_report %>%
  mutate_at(vars(-id),
            funs(as.numeric)) %>%
  mutate(qnr = ifelse(rowSums(.[,3:9]) != 0 , 1, 0)) %>%
  select(id, qepA4, qnr) %>%
  mutate_at(vars(-id),
            funs(ifelse(. == "1", "1-A", "0")))

total_data <- qr_genes %>%
  left_join(ac_genes, by = "id")

total_data_id <- total_data %>%
  column_to_rownames("id")

metadata_total <- mlst_data[,c("id","ST")] %>%
  left_join(isolate_data[,c("id", "species")], by = "id")

cgMLSTdata_complete <- cgMLSTdata2 %>%
  filter(n_NA < 500) %>%
  select(-n_NA) %>%
  column_to_rownames("Genome")

total_tree <- as.phylo(hclust(daisy(cgMLSTdata_complete, metric = "gower"), method = "average"))

total_tree_annotated <- ggtree(total_tree,
                               layout = "circular",
                               color = "#808080",
                               size = 0.5) %<+% metadata_total +
                               geom_tippoint(aes(color = species),
                               size = 3) +
                               geom_tiplab2(aes(label = ST),
                               offset = 0.01,
                               size = 4) +
  scale_color_manual(values = palette)

total_tree_opened <- rotate_tree(open_tree(total_tree_annotated, 8), 93)

gheatmap(total_tree_opened,
         total_data_id,
         offset = 0.04,
         width = 0.3,
         colnames_offset_y = 2.5,
         colnames_position = "top",
         font.size = 5) +
  scale_fill_manual(values = palette3)

4.3 Roary tree

roary_tree <- read.tree("../data/Roary/accessory_binary_genes.fa.newick")

tree_refs <- roary_tree$tip.label

id_data2 <- id_data %>%
  mutate(test = ref %in% tree_refs) %>%
  filter(test == TRUE)

id_df <- left_join(data.frame(ref = tree_refs), id_data2, by = "ref") %>%
  mutate(id = ifelse(is.na(id) == TRUE, ref, id))

roary_tree$tip.label <- id_df$id

total_data2 <- total_data %>%
  filter(id %in% id_df$id) %>%
  column_to_rownames("id")

metadata_total2 <- metadata_total %>%
  filter(id %in% id_df$id)


roary_tree_colored <- ggtree(roary_tree,
       layout = "circular",
       color = "#808080",
       size = 1) %<+% metadata_total2 +
  geom_tiplab2(aes(label = ST, color = species),
               offset = 0.01,
               size = 6,
               align = TRUE) +
  scale_color_manual(values = palette)

total_tree_opened2 <- rotate_tree(open_tree(roary_tree_colored, 8), 93)

gheatmap(total_tree_opened2,
         total_data2,
         offset = 0.065,
         width = 0.3,
         colnames_offset_y = 1.9,
         colnames_position = "top",
         font.size = 5) +
  scale_fill_manual(values = palette3)

4.4 cgMLST-tree per animal species

palette3 <- c("1-A" = "#fc8d59",
              "1-I" = "#80b1d3",
              "0" = "grey95")

qr_genes <- mut_report[c("id","gyrA","parC","parE","rpoB","marA","marR","gadX","soxS")] %>%
  mutate_at(vars(-id),
            funs(ifelse(. == "1", "1-I", "0")))
  
ac_genes <- acquired_report %>%
  mutate_at(vars(-id),
            funs(as.numeric)) %>%
  mutate(qnr = ifelse(rowSums(.[,3:9]) != 0 , 1, 0)) %>%
  select(id, qepA4, qnr) %>%
  mutate_at(vars(-id),
            funs(ifelse(. == "1", "1-A", "0")))

cgMLSTdata <- read.table("../data/cgMLST.txt",
                         sep = "\t",
                         colClasses = "factor",
                         header = T)

cgMLSTdata[cgMLSTdata == "NaN"] <- NA

cgMLSTdata2 <- cgMLSTdata %>%
  mutate(n_NA = apply(., 1, function(x) sum(is.na(x)))) %>%
  filter(n_NA < 500) %>%
  select(-n_NA) %>%
  column_to_rownames("Genome")

broiler_iso <- isolate_data[c("id", "species")] %>%
  filter(species == "Broiler") %>%
  filter(id %not_in% c("2016-17-565-1-S",
                       "2016-02-428-2-S",
                       "2016-02-486-2-S",
                       "2016-02-732-2-S",
                       "2014-01-1741-1-S"))

pig_iso <- isolate_data[c("id", "species")] %>%
  filter(species == "Pig") %>%
  filter(id %not_in% c("2016-17-565-1-S",
                       "2016-02-428-2-S",
                       "2016-02-486-2-S",
                       "2016-02-732-2-S",
                       "2014-01-1741-1-S"))

fox_iso <- isolate_data[c("id", "species")] %>%
  filter(species == "Red fox") %>%
  filter(id %not_in% c("2016-17-565-1-S",
                       "2016-02-428-2-S",
                       "2016-02-486-2-S",
                       "2016-02-732-2-S",
                       "2014-01-1741-1-S"))

bird_iso <- isolate_data[c("id", "species")] %>%
  filter(species == "Wild bird") %>%
  filter(id %not_in% c("2016-17-565-1-S",
                       "2016-02-428-2-S",
                       "2016-02-486-2-S",
                       "2016-02-732-2-S",
                       "2014-01-1741-1-S"))

rawdata_broiler <- cgMLSTdata2 %>%
  rownames_to_column("id") %>%
  filter(id %in% broiler_iso$id) %>%
  dplyr::rename("Genome" = `id`) %>%
  select(Genome, everything()) %>%
  left_join(mlst_data[c("id","ST")], by = c("Genome" = "id")) %>%
  {. ->> data_broiler } %>%
  select(-ST)

rawdata_pig <- cgMLSTdata2 %>%
  rownames_to_column("id") %>%
  filter(id %in% pig_iso$id) %>%
  dplyr::rename("Genome" = `id`) %>%
  select(Genome, everything()) %>%
  left_join(mlst_data[c("id","ST")], by = c("Genome" = "id")) %>%
  {. ->> data_pig } %>%
  select(-ST)

rawdata_fox <- cgMLSTdata2 %>%
  rownames_to_column("id") %>%
  filter(id %in% fox_iso$id) %>%
  dplyr::rename("Genome" = `id`) %>%
  select(Genome, everything()) %>%
  left_join(mlst_data[c("id","ST")], by = c("Genome" = "id")) %>%
  {. ->> data_fox } %>%
  select(-ST)

rawdata_bird <- cgMLSTdata2 %>%
  rownames_to_column("id") %>%
  filter(id %in% bird_iso$id) %>%
  dplyr::rename("Genome" = `id`) %>%
  select(Genome, everything()) %>%
  left_join(mlst_data[c("id","ST")], by = c("Genome" = "id")) %>%
  {. ->> data_bird } %>%
  select(-ST)

write.table(rawdata_broiler, "../data/cgMLST_broiler.txt", sep = "\t", row.names = FALSE)
write.table(rawdata_pig, "../data/cgMLST_pig.txt", sep = "\t", row.names = FALSE)
write.table(rawdata_fox, "../data/cgMLST_fox.txt", sep = "\t", row.names = FALSE)
write.table(rawdata_bird, "../data/cgMLST_bird.txt", sep = "\t", row.names = FALSE)

cgMLSTdata_broiler <- read.table("../data/cgMLST_broiler.txt",
                                 sep = "\t",
                                 row.names=1,
                                 colClasses = "factor",
                                 header = T)

cgMLSTdata_broiler[cgMLSTdata_broiler == "NaN"] <- NA

cgMLST_tree_broiler <- as.phylo(hclust(daisy(cgMLSTdata_broiler,
                                             metric="gower"), method = "average"))

cgMLSTdata_pig <- read.table("../data/cgMLST_pig.txt",
                             sep = "\t",
                             row.names=1,
                             colClasses = "factor",
                             header = T)

cgMLSTdata_pig[cgMLSTdata_pig == "NaN"] <- NA

cgMLST_tree_pig <- as.phylo(hclust(daisy(cgMLSTdata_pig,
                                         metric="gower"), method = "average"))

cgMLSTdata_fox <- read.table("../data/cgMLST_fox.txt", 
                             sep = "\t", 
                             row.names=1, 
                             colClasses = "factor", 
                             header = T)

cgMLSTdata_fox[cgMLSTdata_fox == "NaN"] <- NA

cgMLST_tree_fox <- as.phylo(hclust(daisy(cgMLSTdata_fox,
                                         metric="gower"), method = "average"))

cgMLSTdata_bird <- read.table("../data/cgMLST_bird.txt", 
                              sep = "\t", 
                              row.names=1, 
                              colClasses = "factor", 
                              header = T)

cgMLSTdata_bird[cgMLSTdata_bird == "NaN"] <- NA

cgMLST_tree_bird <- as.phylo(hclust(daisy(cgMLSTdata_bird,
                                          metric="gower"), method = "average"))

broiler_tree <- ggtree(cgMLST_tree_broiler,
                       layout = "circular",
                       color = "#808080",
                       size = 1) %<+% data_broiler +
  geom_tippoint(size = 4,
                color = "#4575b4")+
  geom_tiplab2(aes(label = ST),
               offset = 0.01,
               size = 8)+
  ggtitle("Broiler")+
  theme(plot.title = element_text(size = 40))

pig_tree <- ggtree(cgMLST_tree_pig,
                   layout = "circular",
                   color = "#808080",
                   size = 1) %<+% data_pig +
  geom_tippoint(size = 4,
                color = "#74add1")+
    geom_tiplab2(aes(label = ST),
               offset = 0.01,
               size = 8)+
  ggtitle("Pig")+
  theme(plot.title = element_text(size = 40))

fox_tree <- ggtree(cgMLST_tree_fox,
                   layout = "circular",
                   color = "#808080",
                   size = 1) %<+% data_fox +
  geom_tippoint(size = 4,
                color = "#f46d43")+
    geom_tiplab2(aes(label = ST),
               offset = 0.01,
               size = 8)+
  ggtitle("Red fox")+
  theme(plot.title = element_text(size = 40))

bird_tree <- ggtree(cgMLST_tree_bird,
                    layout = "circular",
                    color = "#808080",
                    size = 1) %<+% data_bird +
  geom_tippoint(size = 4,
                color = "#fdae61")+
    geom_tiplab2(aes(label = ST),
               offset = 0.01,
               size = 8)+
  ggtitle("Wild bird")+
  theme(plot.title = element_text(size = 40))

total_data <- qr_genes %>%
  left_join(ac_genes, by = "id")

broiler_data <- total_data %>%
  filter(id %in% broiler_iso$id) %>%
  column_to_rownames("id")

pig_data <- total_data %>%
  filter(id %in% pig_iso$id) %>%
  column_to_rownames("id")

fox_data <- total_data %>%
  filter(id %in% fox_iso$id) %>%
  column_to_rownames("id")

bird_data <- total_data %>%
  filter(id %in% bird_iso$id) %>%
  column_to_rownames("id")


fan_tree_broiler <- open_tree(broiler_tree, 10)

rotated_fan_tree_broiler <- rotate_tree(fan_tree_broiler, 93)

broiler_annotated_tree <- gheatmap(rotated_fan_tree_broiler,
                                   broiler_data,
                                   offset = 0.06,
                                   width = 0.5,
                                   colnames_offset_y = 0.7,
                                   colnames_position = "top",
                                   font.size = 8,
                                   color = "grey90") +
  scale_fill_manual(values = palette3) +
  guides(fill = FALSE)

fan_tree_pig <- open_tree(pig_tree, 10)

rotated_fan_tree_pig <- rotate_tree(fan_tree_pig, 92)

pig_annotated_tree <- gheatmap(rotated_fan_tree_pig, 
                               pig_data,
                               offset = 0.06, 
                               width = 0.5, 
                               colnames_offset_y = 0.55, 
                               colnames_position = "top", 
                               font.size = 8,
                               color = "grey90") +
  scale_fill_manual(values = palette3) +
  guides(fill = FALSE)


fan_tree_fox <- open_tree(fox_tree, 10)

rotated_fan_tree_fox <- rotate_tree(fan_tree_fox, 92)

fox_annotated_tree <- gheatmap(rotated_fan_tree_fox, 
                               fox_data, 
                               offset = 0.06, 
                               width = 0.5, 
                               colnames_offset_y = 0.2, 
                               colnames_position = "top", 
                               font.size = 8,
                               color = "grey90") +
  scale_fill_manual(values = palette3) +
  guides(fill = FALSE)


fan_tree_bird <- open_tree(bird_tree, 10)

rotated_fan_tree_bird <- rotate_tree(fan_tree_bird, 93)

bird_annotated_tree <- gheatmap(rotated_fan_tree_bird, 
                                bird_data, 
                                offset = 0.06, 
                                width = 0.5, 
                                colnames_offset_y = 0.4, 
                                colnames_position = "top", 
                                font.size = 8,
                                color = "grey90") +
  scale_fill_manual(values = palette3) +
  guides(fill = FALSE)

broiler_annotated_tree

pig_annotated_tree

fox_annotated_tree

bird_annotated_tree

5 Tree-stats

dist_broiler <- cophenetic.phylo(cgMLST_tree_broiler)
dist_broiler[dist_broiler > 0.95] <- NA

dist_pig <- cophenetic.phylo(cgMLST_tree_pig)
dist_pig[dist_pig > 0.95] <- NA

dist_fox <- cophenetic.phylo(cgMLST_tree_fox)
dist_fox[dist_fox > 0.95] <- NA

dist_bird <- cophenetic.phylo(cgMLST_tree_bird)
dist_bird[dist_bird > 0.95] <- NA

print(paste0("Broiler median distance: ",
             round(median(dist_broiler, na.rm = TRUE), 2)))
## [1] "Broiler median distance: 0.88"
print(paste0("Pig median distance: ",
             round(median(dist_pig, na.rm = TRUE), 2)))
## [1] "Pig median distance: 0.87"
print(paste0("Red fox median distance: ",
             round(median(dist_fox, na.rm = TRUE), 2)))
## [1] "Red fox median distance: 0.88"
print(paste0("Wild bird median distance: ",
             round(median(dist_bird, na.rm = TRUE), 2)))
## [1] "Wild bird median distance: 0.86"
par(mfrow = c(2,2))

x1 <- hist(dist_broiler,
     breaks = 20,
     plot = FALSE)

x1$density = x1$counts/sum(x1$counts)*100

plot(x1,
     freq = FALSE,
     main = "Broiler distance frequency",
     xlab = "Distance",
     ylim = c(0, 50))


x2 <- hist(dist_pig,
     breaks = 20,
     plot = FALSE)

x2$density = x2$counts/sum(x2$counts)*100

plot(x2,
     freq = FALSE,
     main = "Pig distance frequency",
     xlab = "Distance",
     ylim = c(0, 50))

x3 <- hist(dist_fox,
     breaks = 20,
     plot = FALSE)

x3$density = x3$counts/sum(x3$counts)*100

plot(x3,
     freq = FALSE,
     main = "Red fox distance frequency",
     xlab = "Distance",
     ylim = c(0, 50))


x4 <- hist(dist_bird,
     breaks = 20,
     plot = FALSE)

x4$density = x4$counts/sum(x4$counts)*100

plot(x4,
     freq = FALSE,
     main = "Wild bird distance frequency",
     xlab = "Distance",
     ylim = c(0, 50))